////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// FIGURE 2 Average Assessment by Type in Each Treatment
************************************

use "$data/eoydata_late.dta", clear
tab treatmentname bias if highgroup ==1
tab treatmentname bias if highgroup ==0
tab treatmentname alpha if highgroup ==1
tab treatmentname alpha if highgroup ==0
keep if filteredsample ==1


gen predictguess = alpha*mean_theory + (1- alpha)*value + bias
gen lbound = 30
gen hbound = 70
gen dummy = 1
egen weight = sum(dummy), by(value highgroup treatmentname)
collapse guess predictguess lbound hbound weight, by(value highgroup treatmentname)
replace weight = weight


preserve
keep if treatmentname =="Baseline"
tab predictguess if value ==60




tw scatter guess value [w=weight] if value >lbound & value < hbound & highgroup ==1 , msize(*0.15)  mcolor(orange*0.5) msymbol(oh)  || ///
line predictguess value if value >lbound & value < hbound & highgroup ==1 , lcolor(dkorange)  || ///
line predictguess value if value >lbound & value < hbound & highgroup ==0 , lcolor(dkgreen)  ||   ////
line value value if value >lbound & value < hbound & highgroup ==0 , lcolor(gray*0.7)  ||   ////
scatter guess value [w=weight] if value >lbound & value < hbound & highgroup ==0 , msize(*0.15) mcolor(green*0.5) msymbol(oh) ||  ///
rcap predictguess value value if value ==40 & highgroup ==0 , lcolor(dkgreen) msymbol(oh)  || ///
rcap predictguess value value if value ==60 & highgroup ==1 , lcolor(dkorange) msymbol(oh)   ///
xtitle("Type", size(medium)) ytitle("Mean Assesment", size(medium))  ///s
ylabel(25 (10) 75) ///
 legend(off) graphregion(color(white)) bgcolor(white) ///
  text(28 40 "B = -1.7, {&omega} = 0.20", color(dkgreen) size(medium)) ///
  text(70 55 "B = 1.8, {&omega} = 0.16", color(dkorange) size(medium)) ///
 title("Baseline", size(medsmall) ) ///
  saving("$tempgraph/bl", replace) 
  
  restore
  preserve
keep if treatmentname =="SignalFirst"

tw scatter guess value [w=weight] if value >lbound & value < hbound & highgroup ==1 , msize(*0.15) mcolor(orange*0.5) msymbol(oh)  || ///
line predictguess value if value >lbound & value < hbound & highgroup ==1 , lcolor(dkorange)  || ///
line predictguess value if value >lbound & value < hbound & highgroup ==0 , lcolor(dkgreen)  ||   ////
line value value if value >lbound & value < hbound & highgroup ==0 , lcolor(gray*0.7)  ||   ////
scatter guess value [w=weight] if value >lbound & value < hbound & highgroup ==0 , msize(*0.15) mcolor(green*0.5) msymbol(oh) ||  ///
rcap predictguess value value if value ==40 & highgroup ==0 , lcolor(dkgreen) msymbol(oh)  || ///
rcap predictguess value value if value ==60 & highgroup ==1 , lcolor(dkorange) msymbol(oh)   ///
xtitle("Type", size(medium)) ytitle("Mean Assesment", size(medium))  ///s
ylabel(25 (10) 75) ///
 legend(off) graphregion(color(white)) bgcolor(white) ///
  text(28 40 "B = -0.3, {&omega} = 0.18", color(dkgreen) size(medium)) ///
  text(70 55 "B = 0.2, {&omega} = 0.15", color(dkorange) size(medium)) ///
 title("SignalFist", size(medsmall) ) ///
  saving("$tempgraph/sf", replace) 
  
  restore
  preserve
keep if treatmentname =="OneGroup"

tw scatter guess value [w=weight] if value >lbound & value < hbound & highgroup ==1 , msize(*0.15) mcolor(orange*0.5) msymbol(oh)  || ///
line predictguess value if value >lbound & value < hbound & highgroup ==1 , lcolor(dkorange)  || ///
line predictguess value if value >lbound & value < hbound & highgroup ==0 , lcolor(dkgreen)  ||   ////
line value value if value >lbound & value < hbound & highgroup ==0 , lcolor(gray*0.7)  ||   ////
scatter guess value [w=weight] if value >lbound & value < hbound & highgroup ==0 , msize(*0.15) mcolor(green*0.5) msymbol(oh) ||  ///
rcap predictguess value value if value ==40 & highgroup ==0 , lcolor(dkgreen) msymbol(oh)  || ///
rcap predictguess value value if value ==60 & highgroup ==1 , lcolor(dkorange) msymbol(oh)   ///
xtitle("Type", size(medium)) ytitle("Mean Assesment", size(medium))  ///s
ylabel(25 (10) 75) ///
 legend(off) graphregion(color(white)) bgcolor(white) ///
  text(28 40 "B = 0.4, {&omega} = 0.09", color(dkgreen) size(medium)) ///
  text(70 55 "B = -0.2, {&omega} = 0.16", color(dkorange) size(medium)) ///
 title("OneGroup", size(medsmall) ) ///
  saving("$tempgraph/og", replace) 
  
  restore
  preserve
keep if treatmentname =="NoGroup"

tw scatter guess value [w=weight] if value >lbound & value < hbound & highgroup ==1 , msize(*0.15) mcolor(orange*0.5) msymbol(oh)  || ///
line predictguess value if value >lbound & value < hbound & highgroup ==1 , lcolor(dkorange)  || ///
line predictguess value if value >lbound & value < hbound & highgroup ==0 , lcolor(dkgreen)  ||   ////
line value value if value >lbound & value < hbound & highgroup ==0 , lcolor(gray*0.7)  ||   ////
scatter guess value [w=weight] if value >lbound & value < hbound & highgroup ==0 , msize(*0.15) mcolor(green*0.5) msymbol(oh) ||  ///
rcap predictguess value value if value ==40 & highgroup ==0 , lcolor(dkgreen) msymbol(oh)  || ///
rcap predictguess value value if value ==60 & highgroup ==1 , lcolor(dkorange) msymbol(oh)   ///
xtitle("Type", size(medium)) ytitle("Mean Assesment", size(medium))  ///s
ylabel(25 (10) 75) ///
 legend(off) graphregion(color(white)) bgcolor(white) ///
  text(28 40 "B = -0.3, {&omega} = 0.02", color(dkgreen) size(medium)) ///
  text(70 55 "B = -0.1, {&omega} = 0.02", color(dkorange) size(medium)) ///
 title("NoGroup", size(medsmall) ) ///
  saving("$tempgraph/ng", replace) 
 
 
 
     graph combine "$tempgraph/bl"  "$tempgraph/ng" , graphregion(color(white)) ///
scheme(s2mono) plotregion(fcolor(white)) row(1) title("Finite") ///
title("", size(medium)) ///
 note("" ///
 , size(tiny))  ///
  saving("$tempgraph/foo", replace) 
  
       graph combine "$tempgraph/bl"  "$tempgraph/ng" , graphregion(color(white)) ///
scheme(s2mono) plotregion(fcolor(white)) row(1) title("Finite") ///
title("", size(medium)) ///
  note("" ///
 , size(tiny))  
graph export "$text/files/guessbyvalue_blng_late_weighted.pdf", replace

 
      graph combine "$tempgraph/og" "$tempgraph/sf" , graphregion(color(white)) ///
scheme(s2mono) plotregion(fcolor(white)) row(1) title("Finite") ///
title("", size(medium)) ///
 note("" ///
 , size(tiny))  ///
   saving("$tempgraph/baz", replace) 
 
 
    graph combine "$tempgraph/foo"  "$tempgraph/baz" , graphregion(color(white)) ///
scheme(s2mono) plotregion(fcolor(white)) row(2) title("Finite") ///
title("", size(medium)) ///
 note("Green (Orange) dots are for low (high) value group. Green and Orange lines depict best linear fit by group and treatment; gray line depicts 45 degree line. Subjects with MSE > 200 excluded." ///
 , size(tiny))  
 
   
       graph combine "$tempgraph/og"  "$tempgraph/sf" , graphregion(color(white)) ///
scheme(s2mono) plotregion(fcolor(white)) row(1) title("Finite") ///
title("", size(medium)) ///
  note("" ///
 , size(vsmall))  
graph export "$text/files/guessbyvalue_ogsf_late_weighted.pdf", replace
///Green (Orange) dots are for low (high) value group. Green and Orange lines depict best linear fit by group and treatment; gray line depicts 45 degree line. Subjects with MSE > 200 excluded.

graph export "$text/files/guessbyvalue_new_late.pdf", replace



////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///  FIGURE 3 Estimates of Representative Signal Distortion (Δ) and Base-Rate Neglect
/// this is used to make statistical statements using bootstrapping on var_signal, alpha_opt - alpha, delta
use "$data/eoydata_late.dta", clear
*use "$data/eoydata.dta", clear
tab treatment treatmentname 
//1. bl, 2. sf, 3.ng, 4. og
drop if filteredsample ==0
gen foo = 1 
egen tfoo = sum(foo), by(treatment)
drop foo
gen tnobs = tfoo/38
*gen tnobs = tfoo/75

gen delta = bias/(1- alpha)
tab delta treatmentname if highgroup ==1
tab delta treatmentname if highgroup ==0

gen diffv_h_bl = diff_v*highgroup
gen diffv_l_bl = diff_v*(1-highgroup)
replace d_bl = 1


set seed 666
forvalues i=1/500{
preserve
bsample tnobs, strata(treatment) cluster(id)
forvalues j = 1/4{
reg diff diffv_h_bl diffv_l_bl d_bl highgroup if treatment == `j', nocons cluster(id)
matrix b = e(b)
replace alpha = b[1,1] if highgroup ==1 & treatment == `j'
replace alpha = b[1,2] if highgroup ==0 & treatment == `j'
replace bias = b[1,3] + b[1,4] if highgroup ==1 & treatment == `j'
replace bias = b[1,3] if highgroup ==0 & treatment == `j'
}
replace signal = (guess - bias - alpha*mean_theory) / (1- alpha)
gen sigerror = signal - value


// This is for variance 
*gen foo = (sigerror)^2
*egen vs_x = mean(foo), by(highgroup treatment)

// This is for the Delta_g graph
*gen foo = bias/(1-alpha)
*egen vs_x = mean(foo), by(highgroup treatment)

// This is for the alpha opt - alpha graph
gen foo = (sigerror)^2
egen vs_x = mean(foo), by(highgroup treatment)
replace vs_x = (1/var_theory)/ ((1/vs_x) + (1/var_theory)) - alpha

// This is for alpha
*gen vs_x =  alpha

gen foo1_h = vs_x if treatment ==1 & highgroup ==1
gen foo2_h = vs_x if treatment ==2 & highgroup ==1
gen foo3_h = vs_x if treatment ==3 & highgroup ==1
gen foo4_h = vs_x if treatment ==4 & highgroup ==1
gen foo1_l = vs_x if treatment ==1 & highgroup ==0
gen foo2_l = vs_x if treatment ==2 & highgroup ==0
gen foo3_l = vs_x if treatment ==3 & highgroup ==0
gen foo4_l = vs_x if treatment ==4 & highgroup ==0
egen gdh1_x = max(foo1_h)
egen gdh2_x = max(foo2_h)
egen gdh3_x = max(foo3_h)
egen gdh4_x = max(foo4_h)
egen gdl1_x = max(foo1_l)
egen gdl2_x = max(foo2_l)
egen gdl3_x = max(foo3_l)
egen gdl4_x = max(foo4_l)
local xxxh1 = gdh1_x
local xxxh2 = gdh2_x
local xxxh3 = gdh3_x
local xxxh4 = gdh4_x
local xxxl1 = gdl1_x
local xxxl2 = gdl2_x
local xxxl3 = gdl3_x
local xxxl4 = gdl4_x
restore
gen gd1h_`i' = `xxxh1' 
gen gd2h_`i' = `xxxh2' 
gen gd3h_`i' = `xxxh3' 
gen gd4h_`i' = `xxxh4' 
gen gd1l_`i' = `xxxl1' 
gen gd2l_`i' = `xxxl2' 
gen gd3l_`i' = `xxxl3' 
gen gd4l_`i' = `xxxl4' 
}


// for alpha
*gen diff_orig =alpha
// for alpha_opt vs alpha
*gen diff_orig = alpha_opt - alpha
// for signal variance
*gen diff_orig = var_signal
// for delta_g
gen diff_orig =  bias/(1- alpha)


collapse gd*  alpha alpha_opt bias var_signal diff_orig, by(treatment treatmentname highgroup)

reshape long gd1h_ gd2h_ gd3h_ gd4h_ gd1l_ gd2l_ gd3l_ gd4l_ , i(treatment treatmentname highgroup) j(index)  


rename gd1h_ gd1h
rename gd2h_ gd2h
rename gd3h_ gd3h
rename gd4h_ gd4h
rename gd1l_ gd1l
rename gd2l_ gd2l
rename gd3l_ gd3l
rename gd4l_ gd4l

gen var_signal_boot = 999
replace var_signal_boot = gd1h if treatment ==1 & highgroup ==1
replace var_signal_boot = gd2h if treatment ==2 & highgroup ==1
replace var_signal_boot = gd3h if treatment ==3 & highgroup ==1
replace var_signal_boot = gd4h if treatment ==4 & highgroup ==1
replace var_signal_boot = gd1l if treatment ==1 & highgroup ==0
replace var_signal_boot = gd2l if treatment ==2 & highgroup ==0
replace var_signal_boot = gd3l if treatment ==3 & highgroup ==0
replace var_signal_boot = gd4l if treatment ==4 & highgroup ==0


tab var_signal treatmentname if highgroup ==1
tab var_signal treatmentname if highgroup ==0

gen diff = var_signal_boot
gen lowgroup = 1-highgroup


bysort treatmentname highgroup: summarize diff, detail

sort treatmentname highgroup
by treatmentname highgroup: egen ub = pctile(diff), p(97.5)
by treatmentname highgroup: egen lb = pctile(diff), p(2.5)


// p-values for whether values are different from zero
preserve
replace diff = diff - diff_orig
gen dummy = 0
replace dummy = 1 if diff< -abs(diff_orig) |  diff> abs(diff_orig)
egen pvalue = mean(dummy), by(treatmentname highgroup)
by treatmentname highgroup: summarize pvalue
restore

// p-values for whether values are different between high vand low groups
preserve
gen foo = diff
replace foo = . if lowgroup ==1
egen diffhigh = max(diff), by(treatmentname index)
drop foo
gen foo = diff_orig
replace foo = . if lowgroup ==1
egen diffhigh_orig = max(diff_orig), by(treatmentname)
drop if highgroup ==1
gen diffnew = diffhigh - diff
gen diffnew_orig = diffhigh_orig - diff_orig
replace diffnew = diffnew - diffnew_orig
gen dummy = 0
replace dummy = 1 if diffnew< -abs(diffnew_orig) |  diffnew> abs(diffnew_orig)
egen pvalue = mean(dummy), by(treatmentname)
by treatmentname: summarize pvalue
restore

// p-values for whether values are different between treatments
//1. bl, 2. sf, 3.ng, 4. og
preserve
gen foo = diff
replace foo = . if treatment >1
egen diffbl = max(diff), by(highgroup index)
drop foo
gen foo = diff_orig
replace foo = . if treatment >1
egen diffbl_orig = max(diff_orig), by(highgroup)
drop if treatmentname =="Baseline"
gen diffnew = diffbl - diff
gen diffnew_orig = diffbl_orig - diff_orig
replace diffnew = diffnew - diffnew_orig
gen dummy = 0
replace dummy = 1 if diffnew< -abs(diffnew_orig) |  diffnew> abs(diffnew_orig)
egen pvalue = mean(dummy), by(treatmentname highgroup)
by treatmentname highgroup: summarize pvalue
restore
 
collapse ub lb diff diff_orig, by(treatmentname lowgroup)


graph twoway (bar diff_orig lowgroup if lowgroup ==0, color(orange) xtitle("") ytitle("") xlabel(none) xscale(lstyle(none)) graphregion(color(white)) bgcolor(white)) (bar diff_orig lowgroup if lowgroup ==1, color(green) xtitle("") xlabel(none) ylabel(-0.4(0.1)0.4) ytitle("{&omega}{sup:Bay} - {&omega}", size(medium))xscale(lstyle(none)) graphregion(color(white)) bgcolor(white)) (rcap ub lb lowgroup, color(black)), by(treatmentname, row(1) legend(off) graphregion(color(white)) bgcolor(white) note("")) subtitle(,bcolor(white)) legend(region(lcol(white))) yline(0, lstyle(foreground) lcolor(black) lwidth(0.5)) 
graph export "$text/files/brnbytreatment_late.pdf", replace	



////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///  FIGURE 3 Estimates of Representative Signal Distortion (Δ) and Base-Rate Neglect
/// this is used to make statistical statements using bootstrapping on var_signal, alpha_opt - alpha, delta
use "$data/eoydata_late.dta", clear
*use "$data/eoydata.dta", clear
tab treatment treatmentname 
//1. bl, 2. sf, 3.ng, 4. og
drop if filteredsample ==0
gen foo = 1 
egen tfoo = sum(foo), by(treatment)
drop foo
gen tnobs = tfoo/38
*gen tnobs = tfoo/75

gen delta = bias/(1- alpha)
tab delta treatmentname if highgroup ==1
tab delta treatmentname if highgroup ==0

gen diffv_h_bl = diff_v*highgroup
gen diffv_l_bl = diff_v*(1-highgroup)
replace d_bl = 1


set seed 666
forvalues i=1/500{
preserve
bsample tnobs, strata(treatment) cluster(id)
forvalues j = 1/4{
reg diff diffv_h_bl diffv_l_bl d_bl highgroup if treatment == `j', nocons cluster(id)
matrix b = e(b)
replace alpha = b[1,1] if highgroup ==1 & treatment == `j'
replace alpha = b[1,2] if highgroup ==0 & treatment == `j'
replace bias = b[1,3] + b[1,4] if highgroup ==1 & treatment == `j'
replace bias = b[1,3] if highgroup ==0 & treatment == `j'
}
replace signal = (guess - bias - alpha*mean_theory) / (1- alpha)
gen sigerror = signal - value


// This is for variance 
*gen foo = (sigerror)^2
*egen vs_x = mean(foo), by(highgroup treatment)

// This is for the Delta_g graph
gen foo = bias/(1-alpha)
egen vs_x = mean(foo), by(highgroup treatment)


// This is for alpha
*gen vs_x =  alpha

gen foo1_h = vs_x if treatment ==1 & highgroup ==1
gen foo2_h = vs_x if treatment ==2 & highgroup ==1
gen foo3_h = vs_x if treatment ==3 & highgroup ==1
gen foo4_h = vs_x if treatment ==4 & highgroup ==1
gen foo1_l = vs_x if treatment ==1 & highgroup ==0
gen foo2_l = vs_x if treatment ==2 & highgroup ==0
gen foo3_l = vs_x if treatment ==3 & highgroup ==0
gen foo4_l = vs_x if treatment ==4 & highgroup ==0
egen gdh1_x = max(foo1_h)
egen gdh2_x = max(foo2_h)
egen gdh3_x = max(foo3_h)
egen gdh4_x = max(foo4_h)
egen gdl1_x = max(foo1_l)
egen gdl2_x = max(foo2_l)
egen gdl3_x = max(foo3_l)
egen gdl4_x = max(foo4_l)
local xxxh1 = gdh1_x
local xxxh2 = gdh2_x
local xxxh3 = gdh3_x
local xxxh4 = gdh4_x
local xxxl1 = gdl1_x
local xxxl2 = gdl2_x
local xxxl3 = gdl3_x
local xxxl4 = gdl4_x
restore
gen gd1h_`i' = `xxxh1' 
gen gd2h_`i' = `xxxh2' 
gen gd3h_`i' = `xxxh3' 
gen gd4h_`i' = `xxxh4' 
gen gd1l_`i' = `xxxl1' 
gen gd2l_`i' = `xxxl2' 
gen gd3l_`i' = `xxxl3' 
gen gd4l_`i' = `xxxl4' 
}


// for alpha
*gen diff_orig =alpha
// for alpha_opt vs alpha
*gen diff_orig = alpha_opt - alpha
// for signal variance
*gen diff_orig = var_signal
// for delta_g
gen diff_orig =  bias/(1- alpha)


collapse gd*  alpha alpha_opt bias var_signal diff_orig, by(treatment treatmentname highgroup)

reshape long gd1h_ gd2h_ gd3h_ gd4h_ gd1l_ gd2l_ gd3l_ gd4l_ , i(treatment treatmentname highgroup) j(index)  


rename gd1h_ gd1h
rename gd2h_ gd2h
rename gd3h_ gd3h
rename gd4h_ gd4h
rename gd1l_ gd1l
rename gd2l_ gd2l
rename gd3l_ gd3l
rename gd4l_ gd4l

gen var_signal_boot = 999
replace var_signal_boot = gd1h if treatment ==1 & highgroup ==1
replace var_signal_boot = gd2h if treatment ==2 & highgroup ==1
replace var_signal_boot = gd3h if treatment ==3 & highgroup ==1
replace var_signal_boot = gd4h if treatment ==4 & highgroup ==1
replace var_signal_boot = gd1l if treatment ==1 & highgroup ==0
replace var_signal_boot = gd2l if treatment ==2 & highgroup ==0
replace var_signal_boot = gd3l if treatment ==3 & highgroup ==0
replace var_signal_boot = gd4l if treatment ==4 & highgroup ==0


tab var_signal treatmentname if highgroup ==1
tab var_signal treatmentname if highgroup ==0

gen diff = var_signal_boot
gen lowgroup = 1-highgroup


bysort treatmentname highgroup: summarize diff, detail

sort treatmentname highgroup
by treatmentname highgroup: egen ub = pctile(diff), p(97.5)
by treatmentname highgroup: egen lb = pctile(diff), p(2.5)


// p-values for whether values are different from zero
preserve
replace diff = diff - diff_orig
gen dummy = 0
replace dummy = 1 if diff< -abs(diff_orig) |  diff> abs(diff_orig)
egen pvalue = mean(dummy), by(treatmentname highgroup)
by treatmentname highgroup: summarize pvalue
restore

// p-values for whether values are different between high vand low groups
preserve
gen foo = diff
replace foo = . if lowgroup ==1
egen diffhigh = max(diff), by(treatmentname index)
drop foo
gen foo = diff_orig
replace foo = . if lowgroup ==1
egen diffhigh_orig = max(diff_orig), by(treatmentname)
drop if highgroup ==1
gen diffnew = diffhigh - diff
gen diffnew_orig = diffhigh_orig - diff_orig
replace diffnew = diffnew - diffnew_orig
gen dummy = 0
replace dummy = 1 if diffnew< -abs(diffnew_orig) |  diffnew> abs(diffnew_orig)
egen pvalue = mean(dummy), by(treatmentname)
by treatmentname: summarize pvalue
restore

// p-values for whether values are different between treatments
//1. bl, 2. sf, 3.ng, 4. og
preserve
gen foo = diff
replace foo = . if treatment >1
egen diffbl = max(diff), by(highgroup index)
drop foo
gen foo = diff_orig
replace foo = . if treatment >1
egen diffbl_orig = max(diff_orig), by(highgroup)
drop if treatmentname =="Baseline"
gen diffnew = diffbl - diff
gen diffnew_orig = diffbl_orig - diff_orig
replace diffnew = diffnew - diffnew_orig
gen dummy = 0
replace dummy = 1 if diffnew< -abs(diffnew_orig) |  diffnew> abs(diffnew_orig)
egen pvalue = mean(dummy), by(treatmentname highgroup)
by treatmentname highgroup: summarize pvalue
restore
 
collapse ub lb diff diff_orig, by(treatmentname lowgroup)

graph twoway (bar diff_orig lowgroup if lowgroup ==0, color(orange) xtitle("") ytitle("") xlabel(none) xscale(lstyle(none)) graphregion(color(white)) bgcolor(white)) (bar diff_orig lowgroup if lowgroup ==1, color(green) xtitle("") xlabel(none) ylabel(-3(1)3) ytitle("{&Delta}{sub:}", size(medium))xscale(lstyle(none)) graphregion(color(white)) bgcolor(white)) (rcap ub lb lowgroup, color(black)), by(treatmentname, row(1) legend(off) graphregion(color(white)) bgcolor(white) note("")) subtitle(,bcolor(white)) legend(region(lcol(white))) yline(0, lstyle(foreground) lcolor(black) lwidth(0.5)) 
graph export "$text/files/deltabytreatment_late.pdf", replace



////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///// FIGURE 4 Estimates of Representative Signal Distortion


************************************
use "$data/eoydata_late.dta", clear
bysort id highgroup: gen round_bygroup = _n
keep if round_bygroup == 1

replace bias_i = bias_i/(1- alpha_i)

gen lb = -10
gen hb = 10

preserve
keep if treatmentname == "NoGroup" 
ksmirnov bias_i, by(highgroup)
restore

preserve
keep if treatmentname == "OneGroup" 
ksmirnov bias_i, by(highgroup)
restore

preserve
keep if treatmentname == "Baseline" 
ksmirnov bias_i, by(highgroup)
restore

preserve
keep if treatmentname == "SignalFirst" 
ksmirnov bias_i, by(highgroup)
restore

preserve
keep if highgroup ==1
keep if treatmentname == "SignalFirst" | treatmentname == "Baseline"
ksmirnov bias_i, by(treatment)

restore
preserve
keep if highgroup ==0
keep if treatmentname == "SignalFirst" | treatmentname == "Baseline"
ksmirnov bias_i, by(treatment)

restore
preserve

keep if treatmentname == "SignalFirst"
sort bias_i highgroup
egen mfoo = median(bias_i), by(highgroup)
gen xxh = mfoo
gen xxl = mfoo
replace xxh = . if highgroup ==0
replace xxl = . if highgroup ==1
egen bazl = max(xxl)
egen bazh = max(xxh)

local mfool = bazl
local mfooh = bazh

ksmirnov bias_i, by(highgroup)
cumul bias_i if highgroup ==1, gen(cu_h)
cumul bias_i if highgroup ==0, gen(cu_l)
line cu_h bias_i if highgroup ==1 & bias_i >lb & bias_i < hb, sort(cu_l) lcolor(dkorange) ///
	|| line cu_l bias_i if highgroup ==0 & bias_i >lb & bias_i < hb, sort(cu_h) lcolor(dkgreen)	///
	t1title("SignalFirst" , size(medium)) xtitle("{&Delta}{sub:g}", size(medium)) graphregion(color(white)) bgcolor(white) ///
			xline(`mfool', lcolor(dkgreen) lpattern(solid)) ///
		xline(`mfooh', lcolor(dkorange) lpattern(solid)) ///
	legend(off) ///
	xlabel(-10(2)10) ///
	ytitle("Cdf", size(medium)) ///
	  saving("$tempgraph/sf", replace) 
	  
	 * legend(order(1 "High-mean" 2 "Low-mean") size(small) rows(2) ring(0) pos(5))  ///	
	
restore
preserve
keep if treatmentname == "Baseline"
sort bias_i highgroup
egen mfoo = median(bias_i), by(highgroup)
gen xxh = mfoo
gen xxl = mfoo
replace xxh = . if highgroup ==0
replace xxl = . if highgroup ==1
egen bazl = max(xxl)
egen bazh = max(xxh)

local mfool = bazl
local mfooh = bazh

ksmirnov bias_i, by(highgroup)
cumul bias_i if highgroup ==1, gen(cu_h)
cumul bias_i if highgroup ==0, gen(cu_l)
line cu_h bias_i if highgroup ==1 & bias_i >lb & bias_i < hb, sort(cu_l) lcolor(dkorange) ///
	|| line cu_l bias_i if highgroup ==0 & bias_i >lb & bias_i < hb, sort(cu_h) lcolor(dkgreen)	///
	t1title("Baseline" , size(medium)) xtitle("{&Delta}{sub:g}", size(medium)) graphregion(color(white)) bgcolor(white) ///
			xline(`mfool', lcolor(dkgreen) lpattern(solid)) ///
		xline(`mfooh', lcolor(dkorange) lpattern(solid)) ///
	legend(off) ///
	xlabel(-10(2)10) ///
	ytitle("Cdf", size(medium)) ///
	  saving("$tempgraph/bl", replace) 
restore
preserve
keep if treatmentname == "OneGroup"
sort bias_i highgroup
egen mfoo = median(bias_i), by(highgroup)
gen xxh = mfoo
gen xxl = mfoo
replace xxh = . if highgroup ==0
replace xxl = . if highgroup ==1
egen bazl = max(xxl)
egen bazh = max(xxh)

local mfool = bazl
local mfooh = bazh

ksmirnov bias_i, by(highgroup)
cumul bias_i if highgroup ==1, gen(cu_h)
cumul bias_i if highgroup ==0, gen(cu_l)
line cu_h bias_i if highgroup ==1 & bias_i >lb & bias_i < hb, sort(cu_l) lcolor(dkorange) ///
	|| line cu_l bias_i if highgroup ==0 & bias_i >lb & bias_i < hb, sort(cu_h) lcolor(dkgreen)	///
	t1title("OneGroup" , size(medium)) xtitle("{&Delta}{sub:g}", size(medium)) graphregion(color(white)) bgcolor(white) ///
			xline(`mfool', lcolor(dkgreen) lpattern(solid)) ///
		xline(`mfooh', lcolor(dkorange) lpattern(solid)) ///
	legend(off) ///
	xlabel(-10(2)10) ///
	ytitle("Cdf", size(medium)) ///
	  saving("$tempgraph/og", replace) 
	  restore
preserve
keep if treatmentname == "NoGroup"
sort bias_i highgroup
egen mfoo = median(bias_i), by(highgroup)
gen xxh = mfoo
gen xxl = mfoo
replace xxh = . if highgroup ==0
replace xxl = . if highgroup ==1
egen bazl = max(xxl)
egen bazh = max(xxh)

local mfool = bazl
local mfooh = bazh

ksmirnov bias_i, by(highgroup)
cumul bias_i if highgroup ==1, gen(cu_h)
cumul bias_i if highgroup ==0, gen(cu_l)
line cu_h bias_i if highgroup ==1 & bias_i >lb & bias_i < hb, sort(cu_l) lcolor(dkorange) ///
	|| line cu_l bias_i if highgroup ==0 & bias_i >lb & bias_i < hb, sort(cu_h) lcolor(dkgreen)	///
	t1title("NoGroup" , size(medium)) xtitle("{&Delta}{sub:g}", size(medium)) graphregion(color(white)) bgcolor(white) ///
			xline(`mfool', lcolor(dkgreen) lpattern(solid)) ///
		xline(`mfooh', lcolor(dkorange) lpattern(solid)) ///
	legend(off) ///
	xlabel(-6(2)6) ///
	ytitle("Cdf", size(medium)) ///
	  saving("$tempgraph/ng", replace) 
	  
graph combine "$tempgraph/bl" "$tempgraph/ng" "$tempgraph/og" "$tempgraph/sf", graphregion(color(white)) ///
scheme(s2mono) plotregion(fcolor(white)) row(2) title("") ///
title("", size(medium)) ///
note("", size(vsmall))  
graph export "$text/files/cdfofdeltas_late.pdf", replace



////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///// FIGURE 5 Mean Squared Error and Group Difference by Treatment

//non parametric approach
clear
set obs 10000
gen value = round(_n/100 - 0.500000001) +1
gen guess = _n -(value-1)*100 
// compute p(v) 
gen prob_value = 1/2*(normal((value +0.5 - 40)/10) - normal((value -0.5 - 40)/10)) +1/2*(normal((value +0.5 - 60)/10) - normal((value -0.5 - 60)/10))
egen tt = sum(prob_value), by(guess)
replace prob_value = prob_value/tt
preserve
keep if value>29 & value <71
egen xx = sum(prob_value)
tab xx
restore
drop tt
append using  "$data/eoydata_late.dta"
*append using "$data/eoydata_early.dta"
gen foo= prob_value
drop prob_value
egen prob_value = max(foo), by(value)
drop if treatment ==.
keep if filteredsample ==1
*keep if treatmentname == "Baseline"
gen lgroup = highgroup==0
egen vlcount = sum(lgroup), by(treatment value)
egen vhcount = sum(highgroup), by(treatment value)
egen meanmse = mean(mse), by(treatment)
keep if vlcount>9 & vhcount >9
collapse prob_value guess alpha bias meanmse, by(value highgroup treatmentname)
egen tt = sum(prob_value), by(treatmentname highgroup)
gen prob_valuesy = prob_value/tt
gen foo = guess*(highgroup==0)
egen lguess = max(foo), by(value treatmentname)
drop foo
drop if highgroup==0
gen diff  = guess - lguess
gen baz = diff *prob_valuesy
egen gd = sum(baz), by(treatmentname)

tab gd treatmentname
tab meanmse treatmentname
collapse gd meanmse, by(treatmentname)
*save "$data/measures.dta", replace
save "$data/measures_late.dta", replace
*save "$data/measures_early.dta", replace




import delimited "$m/output/frontier_alphabeta_all.txt", clear //
import delimited "$m/output/frontier_alphabeta_all_early.txt", clear //
import delimited "$m/output/frontier_alphabeta_all_late.txt", clear //
import delimited "$m/output/frontier_alphabeta_all_median.txt", clear //
import delimited "$m/output/frontier_alphabeta_all_median_late.txt", clear //
rename v1 alpha_l
rename v2 alpha_h
rename v3 beta_l
rename v4 beta_h
rename v6 mse
rename v7 gd
save "$data/frontier_xxx.dta", replace
save "$data/frontier_early.dta", replace
save "$data/frontier_late.dta", replace
save "$data/frontier_median.dta", replace
save "$data/frontier_median_late.dta", replace

import delimited "$m/output/frontier_alpha.txt", clear //
import delimited "$m/output/frontier_alpha_early.txt", clear //
import delimited "$m/output/frontier_alpha_late.txt", clear //
import delimited "$m/output/frontier_alpha_median.txt", clear //
import delimited "$m/output/frontier_alpha_median_late.txt", clear //
rename v1 alpha_l
rename v2 alpha_h
rename v3 beta_l
rename v4 beta_h
rename v6 mse
rename v7 gd
save "$data/frontier_alpha.dta", replace
save "$data/frontier_alpha_early.dta", replace
save "$data/frontier_alpha_late.dta", replace
save "$data/frontier_alpha_median.dta", replace
save "$data/frontier_alpha_median_late.dta", replace

use "$data/frontier_alpha_late.dta", clear
use "$data/frontier_alpha_early.dta", clear
use "$data/frontier_alpha_median.dta", clear
use "$data/frontier_alpha_median_late.dta", clear
replace mse = round(mse, 0.01)
replace gd = round(gd, 0.1)
egen minmse= min(mse), by(gd)
egen minminmse = min(mse)
gen dummy = mse ==minmse
//general
gen diff = (alpha_l - 0.18)^2 + (alpha_h - 0.18)^2
//early
gen diff = (alpha_l - 0.17)^2 + (alpha_h - 0.18)^2
//late
gen diff = (alpha_l - 0.20)^2 + (alpha_h - 0.16)^2
egen mindiff = min(diff)
gen nobias = diff ==mindiff
replace dummy = 1 if nobias ==1
keep if dummy ==1
gen dummy2 = mse ==minminmse
gen foo = dummy2*gd
egen maxgd = max(foo)
gen tobedropped = gd >maxgd
replace tobedropped = 0 if nobias == 1
drop if tobedropped == 1
*tw scatter mse gd
gen betaone = 1
save "$data/frontier_alpha_formatted_early.dta", replace
save "$data/frontier_alpha_formatted_late.dta", replace
save "$data/frontier_alpha_formatted_median.dta", replace
save "$data/frontier_alpha_formatted_median_late.dta", replace

use "$data/frontier_early.dta", clear
use "$data/frontier_late.dta", clear
use "$data/frontier_median.dta", clear
use "$data/frontier_median_late.dta", clear
replace mse = round(mse, 0.01)
replace gd = round(gd, 0.1)
egen minmse= min(mse), by(gd)
egen minminmse = min(mse)
gen dummy = mse ==minmse
keep if dummy ==1
gen dummy2 = mse ==minminmse
gen foo = dummy2*gd
egen maxgd = max(foo)
drop if gd >maxgd
*tw scatter mse gd
gen betaone = 0
save "$data/frontier_formatted_early.dta", replace
save "$data/frontier_formatted_late.dta", replace
save "$data/frontier_formatted_median.dta", replace
save "$data/frontier_formatted_median_late.dta", replace



/// Early vs Late

use "$data/frontier_alpha_formatted_early.dta", clear
append using "$data/frontier_formatted_early.dta"
append using "$data/bootstrapped_msegd_early.dta"

use "$data/frontier_alpha_formatted_late.dta", clear
append using "$data/frontier_formatted_late.dta"
append using "$data/bootstrapped_msegd_late.dta"

gen new =1 if alpha_l ==.

drop index
append using "$data/measures_late.dta"
gen label_bl="Baseline"
gen label_sf="SignalFirst"
gen label_og="OneGroup"
gen label_ng="NoGroup"
gen label_bay="Bayesian"
gen label_nobias="NoBias"
gen label_pbrn="pBRN"
gen label_nodisc="OptNoDiscrimination"
gen pos1 = 6
gen pos2 = 5
gen index =1 if new !=1
gen pbrn = 1 if gd == 0 & betaone ==1
gen nodisc = 1 if gd == 0 & betaone ==0
gen bay = 1 if mse ==minminmse & betaone ==0
egen xx = max(gd), by(bay)
replace bay = 0 if gd !=xx
drop if gd <-1 | gd > 10
drop if mse >85 & new==1
gen foox = _n
egen xxx = min(foox), by(nodisc)
drop if foox >xxx & nodisc ==1
	
	twoway ///
			 scatter mse gd if new==1 & treatment ==4, color(green%10) lcolor(blue%10)   mlwidth(none) msize(small) msymbol(t) mlabv(pos1) ///
||	 	 scatter mse gd if new==1 & treatment ==3, color(red%10) msize(small) mlwidth(none) msymbol(s) mlabv(pos1) ///
||	 scatter mse gd if new==1 & treatment ==1, mcolor(blue%10) msize(small)  mlwidth(none) msymbol(o) mlabv(pos1) ///
||  scatter mse gd if new==1 & treatment ==2, mcolor(green%10) msize(small)  mlwidth(none) msymbol(d) mlabv(pos1) ///
 || scatter meanmse gd if (index ==1 & treatmentname =="Baseline"), mcolor(blue) msize(large) msymbol(circle) mlabel("label_bl") mlabv(pos1) mlabsize(3) ///
|| scatter meanmse gd if (index ==1 & treatmentname =="NoGroup"), mcolor(red) msize(large) msymbol(circle) mlabel("label_ng") mlabv(pos2) mlabsize(3) ///
|| scatter meanmse gd if (index ==1 & treatmentname =="SignalFirst"), mcolor(green) msize(large) msymbol(diamond) mlabel("label_sf") mlabv(pos1) mlabsize(3)  ///
|| scatter meanmse gd if (index ==1 & treatmentname =="OneGroup"), mcolor(green) msize(large) msymbol(triangle) mlabel("label_og") mlabv(pos1) mlabsize(3)  ///
|| line mse gd if (meanmse ==. & betaone ==0), lcolor(eltgreen*.9) ///
|| scatter mse gd if (bay ==1), mcolor(b) msize(medium) msymbol(square) mlabel("label_bay") mlabsize(3)  mlabv(pos1) ///
|| scatter mse gd if (pbrn ==1), mcolor(b) msize(medium) msymbol(square) mlabel("label_pbrn") mlabsize(3) mlabv(pos1) ///
|| scatter mse gd if (nobias ==1), mcolor(b) msize(medium) msymbol(square) mlabel("label_nobias") mlabsize(3) mlabv(pos1) ///
|| scatter mse gd if (nodisc ==1), mcolor(b) msize(medium) msymbol(square) mlabel("label_nodisc") mlabsize(3) mlabv(pos1) ///
  ytitle(CDF) ytitle("Inaccuracy (MSE)", size(medium) height(7)) xtitle("Discrimination (GD)", size(medium)) ///
	   legend(off) ylabel(35(10)85, labsize(small)) plotregion(style(none) ) ///
	   scheme(s1mono) xlabel(-1 (1) 10, valuelabel labsize(small))  legend(region(lcolor(black) lwidth(vthin) lpattern(dash))) legend(size(small)) ///
	   title("", size(medium)) ///
  saving("$tempgraph/late", replace) 
	   

	//graph export "$text/files/newsummarygraph_late_formaintext.pdf", replace  
	
	
	
	
	
	

